data_sumtable <-
readxl::read_excel(path = "Summary_TR_predictions_with_rs.xlsx", sheet = "RQ0_table")
data_acc <-
readxl::read_excel(path = "Summary_TR_predictions_with_rs.xlsx", sheet = "RQ1_acc")
data_imp_features <-
readxl::read_excel(path = "Summary_TR_predictions_with_rs.xlsx", sheet = "RQ2_Important_features", na = "NA")
data_dimreduction <-
readxl::read_excel(path = "Summary_TR_predictions_with_rs.xlsx", sheet = "RQ3_reducing")
data_PROBAST <- readxl::read_excel(path = "PROBAST_assessment.xlsx", sheet = "Main")
colnames(data_PROBAST) <- data_PROBAST[1,]
data_PROBAST <- data_PROBAST[2:14,]
# Replace spaces and hyphens in column names with _
colnames(data_acc) <- gsub(" ","_",colnames(data_acc))
colnames(data_imp_features) <- gsub(" - ","_",colnames(data_imp_features))
colnames(data_imp_features) <- gsub(" ","_",colnames(data_imp_features))
colnames(data_dimreduction) <- gsub(" ","_",colnames(data_dimreduction))
## Step 1: Modify table entries
# Column: Responders/nonresponders
data_sumtable$`Responders/nonresponders` <- gsub(" responders, ","/",data_sumtable$`Responders/nonresponders`)
data_sumtable$`Responders/nonresponders` <- gsub(" nonresponders","",data_sumtable$`Responders/nonresponders`)
data_sumtable$`Responders/nonresponders` <- gsub(" remitters, ","/",data_sumtable$`Responders/nonresponders`)
data_sumtable$`Responders/nonresponders` <- gsub(" nonremitters","",data_sumtable$`Responders/nonresponders`)
# Add proportion of nonresponders
# proportion_nonresponders <- character(nrow(data_sumtable))
# row_idx = 8
# for (row_idx in 1:nrow(data_sumtable)){
# strsplit(data_sumtable$`Responders/nonresponders`[row_idx], "\n")
# if
# entry
# entry <- data_sumtable$`Responders/nonresponders`[row_idx]
# responders <- strsplit(data_sumtable$`Responders/nonresponders`[row_idx], c("; \r\n"))[[1]][1]
# nonresponders <- strsplit(entry, "/")[[1]][2]
# proportion <- round(as.numeric(nonresponders)/(as.numeric(responders)+as.numeric(nonresponders))*100,0)
# entry <- paste(data_sumtable$`Responders/nonresponders`[row_idx]," (",proportion,"%)", sep = "")
# }
# Column: Treatment
data_sumtable$Treatment <- gsub("atypical antipsychotics","AAP",data_sumtable$Treatment)
#data_sumtable$treatment <- gsub("Alpha2-receptor-antagonists","AAP",data_sumtable$treatment)
# Column: Information on models tested
data_sumtable$`Information on models tested` <- gsub(" of models tested","",data_sumtable$`Information on models tested`)
# Column: Definition of treatment outcome
data_sumtable$`Definition of treatment outcome` <- gsub("\\s*\\([^\\)]+\\)","",data_sumtable$`Definition of treatment outcome`)
# Column: Input features
data_sumtable$`Type of functional-connectivity-based input features` <- gsub("[(]wb:.*)","",data_sumtable$`Type of functional-connectivity-based input features`)
# Column: FC estimation
data_sumtable$`Way of estimating the underlying functional connectivities`<- gsub("Group-information guided","Gig",data_sumtable$`Way of estimating the underlying functional connectivities`)
# Column: Algorithms
data_sumtable$`Algorithm(s) of the final classifier(s)`<- gsub("graph convolutional network","GCN",data_sumtable$`Algorithm(s) of the final classifier(s)`)
## Step 2: delete columns (Age group and Year)
data_sumtable$`Age group`<- NULL
data_sumtable$Year <- NULL
## Step 3: change column names
colnames(data_sumtable) <- gsub("Sample size","N",colnames(data_sumtable))
colnames(data_sumtable) <- gsub("Accuracy_rounded","Best Acc",colnames(data_sumtable))
colnames(data_sumtable) <- gsub("Responders/nonresponders","Responders/ nonresponders",colnames(data_sumtable))
colnames(data_sumtable) <- gsub("Definition of treatment outcome","Definition treatment outcome",colnames(data_sumtable))
colnames(data_sumtable) <- gsub("Type of functional-connectivity-based input features","Input features",colnames(data_sumtable))
colnames(data_sumtable) <- gsub("Way of estimating the underlying functional connectivities","Estimating FCs",colnames(data_sumtable))
## Step 4: turn values of numeric variables (N and Best ACC) into strings
data_sumtable$N <- as.character(data_sumtable$N)
data_sumtable$`Best Acc` <- as.character(data_sumtable$`Best Acc`)
# More info about settings in flextable: https://ardata-fr.github.io/flextable-book/cell-content.html
# Set defaults
set_flextable_defaults(font.family = "Arial",
font.size = 8,
padding.bottom = 3,
padding.top = 3,
padding.left = 0.5,
paddings.right = 0.5,
#theme_fun = "theme_apa",
theme_fun = NULL,
text.align = "left",
line_spacing = 1.5)
# Initialize flex_table
apa_sumtable <- flextable(data_sumtable)
# Set table properties
apa_sumtable <- set_table_properties(apa_sumtable, width = 1, layout = "autofit")
# Save flextable to word
margins <- page_mar(
bottom = 0.5,
top = 0.5,
right = 0.5,
left = 0.5,
header = 0.5,
footer = 0.5,
gutter = 0.5
)
format_table <- prop_section(
page_size = page_size(orient = "landscape"),
page_margins = margins)
flextable::save_as_docx(apa_sumtable, path = "plots/table_1_extraction.docx", pr_section = format_table)
data_sumtable
The mean accuracy of the best model per study is 80%, with a range of 61% - 90%. The “balanced mean accuracy” is 76%, with a range of 58% - 89%.
# Add column for ROB to data_acc using information from 4.8. in data_PROBAST
studies_low_ROB <- c(data_PROBAST[data_PROBAST$`4.8 Were model overfitting and optimism in model performance accounted for?` == "Y",c("Study")])$Study
data_acc$ROB <- ifelse(data_acc$Study %in% studies_low_ROB, "low risk of data leakage", "high risk of data leakage")
# Bring data_acc into longformat
data_acc_long <- reshape(data = data_acc, idvar = "Study", varying = c("Accuracy_rounded","Accuracy_controlled"), v.name = "metric", times = c("Accuracy_rounded","Accuracy_controlled"), new.row.names = 1:1000, direction = "long")
data_acc_long$time <- as.factor(data_acc_long$time)
data_acc_long$time <- factor(data_acc_long$time, levels = c("Accuracy_rounded", "Accuracy_controlled"))
# Plot data without risk of bias
labels_acc_germ <- c("berichtete Genauigkeit","für Zufallsniveau\nkontrollierte Genauigkeit")
labels_acc_eng <- c("reported\naccuracy","balanced\naccuracy")
plot_acc_contr_violin <- ggplot(data = data_acc_long, aes(y =`metric`, x = `time`))+
geom_violin() +
geom_quasirandom(size = size_dots) +
stat_summary(
geom = "point",
fun.y = "mean",
col = "black",
size = size_dots,
shape = 24,
fill = "red"
)+
labs(x="", y = "Accuracy in %")+
scale_x_discrete(labels = labels_acc_eng)
# Plot data with risk of bias
plot_acc_contr_violin_rob <- ggplot(data = data_acc_long, aes(y =`metric`, x = `time`))+
geom_violin() +
geom_quasirandom(size = size_dots-0.5, aes(colour = `ROB`)) +
scale_colour_manual(name = "Risk of bias", values=c("grey69","black"))+
stat_summary(
geom = "point",
fun.y = "mean",
col = "black",
size = size_dots -0.5,
shape = 24,
fill = "red"
)+
labs(x="", y = "Accuracy in %")+
theme(legend.position = "bottom")+
theme(legend.title=element_blank()) +
scale_x_discrete(labels = labels_acc_eng)+
guides(color = guide_legend(nrow = 2, byrow = TRUE))
# Display and Save both plots
plot_acc_contr_violin
plot_acc_contr_violin_rob
ggsave("plots/violin_acc_normal_and_controlled.svg", plot_acc_contr_violin)
ggsave("plots/violin_acc_normal_and_controlled_rob.svg",plot_acc_contr_violin_rob, height = 4)
The mean sample size was N = 75, with a range of [18, 144].
# Add column for type of treatment manually to data_acc
data_acc$Treatment <- c("rTMS","medication","rTMS","medication","ECT","medication","medication and psychotherapy","ECT","medication","ECT","medication","psychotherapy","psychotherapy")
# Plot settings
size_half = size_dots * 1.5
pos_half_vert = 0.4
pos_half_horiz = 0.2
# Plot sample size and balanced accuracy
plot_acc_contr_sample_size <- ggplot(data = data_acc, aes(y =`Accuracy_controlled`, x = `Sample_size`))+
theme(text = element_text(family = "Arial"))+
geom_point(aes(color = `Treatment`), size = size_dots)+ #draw point for all treatments
geom_text(data = data_acc[data_acc$Treatment == "medication and psychotherapy",],
label = "\u25D7",
aes(`Sample_size`, `Accuracy_controlled`,colour = "medication"),
size= size_half,
hjust = pos_half_horiz,
vjust = pos_half_vert,
family = "Lucida Sans Unicode",
key_glyph = draw_key_blank)+
geom_text(data = data_acc[data_acc$Treatment == "medication and psychotherapy",],
label = "\u25D6",
aes(`Sample_size`, `Accuracy_controlled`,colour = "psychotherapy"),
size= size_half,
hjust = pos_half_horiz + 0.57,
vjust= pos_half_vert,
family = "Lucida Sans Unicode",
key_glyph = draw_key_blank)+
scale_color_manual(
breaks = c("medication", "ECT","psychotherapy","rTMS"),
values = c("ECT" = color_pal[1], "medication" = color_pal[2], "medication and psychotherapy" = "grey","psychotherapy" = color_pal[3], "rTMS" = color_pal[4])
)+
geom_smooth(method = "lm", color = "black", size = 0.5)+
labs(y = "Balanced accuracy in %", x = "Sample size")+
scale_x_continuous(limits=c(15,147), breaks = c(25,50,75,100,125,150)) +
theme(legend.position = "bottom")+
theme(legend.title=element_blank()) +
guides(color = guide_legend(nrow = 2, byrow = TRUE))
ggsave("plots/plot_samplesize.svg",plot_acc_contr_sample_size, height = 4)
# Combine both plots (Accuracy with risk of bias and Sample size and balanced accuracy)
plot_combined <- plot_acc_contr_violin_rob + plot_acc_contr_sample_size + plot_annotation(tag_levels = "A") + plot_layout(ncol = 2)
plot_combined
cairo_pdf("plots/figure_2_acc_sample_size.pdf", height = 3.5)
plot_combined
dev.off()
## png
## 2
ggsave("plots/figure_2_acc_sample_size.svg",plot_combined, height = 3.5)
data_imp_features_clean <- data_imp_features[!(is.na(data_imp_features$Features_with_high_predictive_value)),]
12 out of 13 studies made a statement on feature importance.
# Add new rows when two categories were chosen
data_imp_features_clean_way <- data_imp_features_clean
for (row_idx in 1:nrow(data_imp_features_clean)){
entry <- data_imp_features_clean_way$Way_of_measuring_predictive_value_categories[row_idx]
if (grepl(";",entry)){
categories <- strsplit(entry,"; ")[[1]]
# Copy row
data_imp_features_clean_way <- rbind(data_imp_features_clean_way,data_imp_features_clean_way[row_idx,])
# Replace category in old row
data_imp_features_clean_way$Way_of_measuring_predictive_value_categories[row_idx]<-categories[1]
# Replace category in new row
data_imp_features_clean_way$Way_of_measuring_predictive_value_categories[nrow(data_imp_features_clean_way)]<-categories[2]
}
}
# Plot data
plot_measure_imp <- ggplot2::ggplot(data = data_imp_features_clean_way,aes(x = `Way_of_measuring_predictive_value_categories`, color = `Study`))+
geom_bar() +
theme(legend.position = "none")+
xlab("")+
coord_flip()+
ggtitle("Ways to measure high predictive value")
ggplotly(plot_measure_imp)
# Get column names
cols_tested <- colnames(data_imp_features_clean )[endsWith(colnames(data_imp_features_clean ),"_tested")]
cols_important <- colnames(data_imp_features_clean )[endsWith(colnames(data_imp_features_clean ),"_important")]
# First: Reshape tested-columns
data_imp_features_clean$ID = rownames(data_imp_features_clean)
data_ROIs_tested_long <- reshape(data = data_imp_features_clean,idvar = "Study", new.row.names = 1:20000,
varying = cols_tested, v.name = "tested", times = cols_tested, drop = cols_important, direction = "long")
colnames(data_ROIs_tested_long)[colnames(data_ROIs_tested_long)=="time"] <- 'ROI'
data_ROIs_tested_long$ROI <- gsub("_tested","",data_ROIs_tested_long$ROI)
data_ROIs_tested_long$ID <- paste(data_ROIs_tested_long$`Study`,data_ROIs_tested_long$ROI)
# Second: Reshape important-columns
data_ROIs_important_long <- reshape(data = data_imp_features_clean , idvar = "ID",new.row.names = 1:20000,
varying = cols_important, v.name = "important", times = cols_important, direction = "long", drop = cols_tested)
data_ROIs_important_long$ROI <- gsub("_important","",data_ROIs_important_long$time)
data_ROIs_important_long$ID <- paste(data_ROIs_important_long$`Study`,data_ROIs_important_long$ROI)
# Third: Merge both data sets
data_ROIs <- merge(data_ROIs_tested_long,data_ROIs_important_long)
data_ROIs$time <- gsub("_important","",data_ROIs$time)
colnames(data_ROIs)[colnames(data_ROIs)=="time"] <- 'Region'
# Exclude ROIs, when they were not tested
data_ROIs_only_tested <- data_ROIs[!c(data_ROIs$tested == "n"|is.na(data_ROIs$tested)|data_ROIs$tested == "NA"),]
# Convert to factor
data_ROIs_only_tested$important <- as.factor(data_ROIs_only_tested$important)
data_ROIs_only_tested$Region <-
gsub("_"," ",data_ROIs_only_tested$Region)
# Wide data set with absolute and relative freq per region
freq_all <- as.data.frame(table(data_ROIs_only_tested$Region))
data_ROIs_only_tested_important <- data_ROIs_only_tested[data_ROIs_only_tested$important=="y",]
data_ROIs_only_tested_nonimportant <- data_ROIs_only_tested[data_ROIs_only_tested$important=="n",]
freq_important <- as.data.frame(table(data_ROIs_only_tested_important$Region))
colnames(freq_important) <- c("Var1","Abs_frequency")
freq_nonimportant <- as.data.frame(table(data_ROIs_only_tested_nonimportant$Region))
colnames(freq_nonimportant) <- c("Var1","Freq_nonimportant")
freq_all <- merge(freq_all,freq_important,by= "Var1")
freq_all <- merge(freq_all,freq_nonimportant,by= "Var1")
freq_all$Rel_frequency <- round((freq_all$Abs_frequency/freq_all$Freq)*100)
# Get features with relative frequencies above 30% and order them according to frequency
temp_dataset <- freq_all[freq_all$Rel_frequency>30, c("Var1","Rel_frequency")]
temp_dataset <- arrange(temp_dataset,Rel_frequency)
Regions_high_freq <- temp_dataset$Var1
data_ROIs_only_tested$Region <- factor(data_ROIs_only_tested$Region, levels = Regions_high_freq)
This plot shows whose connectivities were particularily important for the prediction of TR.
# Reduce data set to only high frequency regions
data_ROIs_only_tested_high_freq <- data_ROIs_only_tested[data_ROIs_only_tested$Region %in% Regions_high_freq,]
plot_feat_pred_value <- ggplot2::ggplot(data = data_ROIs_only_tested_high_freq, aes(x =`Region`, fill= `important`))+
geom_bar()+
#theme(axis.text.x = element_text(angle = 90))+
scale_fill_brewer(palette = "Oranges")+
geom_text(aes(label = scales::percent(..count../tapply(..count.., ..x.. ,sum)[..x..])),stat = "count", position= position_stack(vjust = 0.5))+
xlab("")+
coord_flip()
#https://stackoverflow.com/questions/55680449/ggplot-filled-barplot-with-percentage-labels
# Change labels
scaleFUN <- function(x) x*100
legend_predvalue_eng <- c("no predictive value", "predictive value")
# Plot
plot_feat_pred_value_2 <- ggplot(data = data_ROIs_only_tested_high_freq, aes(x =`Region`, fill= `important`))+
geom_bar(aes (y = ..count../tapply(..count.., ..x.. ,sum)[..x..]))+
#theme(axis.text.x = element_text(angle = 90))+
scale_fill_brewer(palette = "Oranges", labels = legend_predvalue_eng)+
scale_y_continuous(labels=scaleFUN)+
geom_text(stat = "count", aes(label = paste0("n=",..count..)), position = position_fill(vjust =0.5))+
xlab("")+
ylab("Relative Frequency in %")+
theme(legend.title=element_blank(),legend.position = "right") +
#theme (plot.margin=unit (c (8,5.5,5.5,5.5),units = "pt"))+
#scale_fill_manual()+
coord_flip()
plot_feat_pred_value_2
# Save plot
cairo_pdf("plots/figure_3_predictive_value.pdf", height = 3.5)
plot_feat_pred_value_2
dev.off()
## png
## 2
ggsave("plots/figure_3_predictive_value.svg",plot_feat_pred_value_2,height = 4)
# Bring data into longformat
cols_reduction <- colnames(data_dimreduction)[4:9]
data_dimreduction_long <- reshape(data = data_dimreduction, idvar = "Study",varying = cols_reduction, v.name = "applied", times = cols_reduction, new.row.names = 1:1000, direction = "long")
# Plot for feature generation
plot_reduction_generation <- ggplot(data = data_dimreduction_long[(!(is.na(data_dimreduction_long$applied))& data_dimreduction_long$time %in% c("atlas-based_parcellation", "data-driven_parcellation", "theory-based_ROI-/connectivity-selection")),],aes(x = `time`, color = `Study`))+
geom_bar() +
coord_flip() +
xlab("")+
theme(legend.position = "none", plot.title = element_text(size =14))+
ggtitle("Feature generation")
ggplotly(plot_reduction_generation)
# Plot for feature extraction
cols_feat_extract <- colnames(data_dimreduction)[7:9]
plot_reduction_extraction <- ggplot2::ggplot(data = data_dimreduction_long[(!(is.na(data_dimreduction_long$applied))& data_dimreduction_long$time %in% cols_feat_extract),],aes(x = `time`, color = `Study`))+
geom_bar() +
theme(legend.position = "none", plot.title = element_text(size=14))+
coord_flip() +
xlab("")+
ggtitle("Feature selection")
ggplotly(plot_reduction_extraction)
# Final rating columns
cols_final_rating <- c("Final rating domain 1", "Final rating domain 2", "Final rating domain 3", "Final rating domain 4", "Final rating")
data_PROBAST_plot <- data_PROBAST[c("Study",cols_final_rating)]
# Bring data into long-format
data_PROBAST_plot_long <- reshape(data = data_PROBAST_plot,idvar = "Study", new.row.names = 1:20000,varying = cols_final_rating, v.name = "ROB", times = cols_final_rating, direction = "long")
colnames(data_PROBAST_plot_long)[colnames(data_PROBAST_plot_long)=="time"] <- 'Rating_domain'
# Order and rename factor rating_domain
PROBAST_domains_eng <- c("ROB Sample","ROB Predictors", "ROB Outcome", "ROB Analysis", "ROB Total")
data_PROBAST_plot_long$Rating_domain <- factor(data_PROBAST_plot_long$Rating_domain,
levels = c("Final rating domain 1", "Final rating domain 2", "Final rating domain 3", "Final rating domain 4", "Final rating"), labels = PROBAST_domains_eng)
scaleFUN <- function(x) x*100
# Prepare final rating domains labels to make one label bold
breaks <- levels(data_PROBAST_plot_long$Rating_domain)
labels <- as.expression(breaks)
labels[[5]] <- bquote(bold(.(labels[[5]])))
# Plot data
legend_PROBAST_eng <- c("high", "low")
plot_PROBAST <- ggplot(data = data_PROBAST_plot_long,aes(x = `Rating_domain`, fill = `ROB`))+ geom_bar(aes (alpha = Rating_domain == "ROB Gesamt", y = ..count../tapply(..count.., ..x.. ,sum)[..x..]))+
coord_flip()+
scale_fill_manual(values = c("red3","chartreuse3"), name = "Risk of bias (ROB)", labels = legend_PROBAST_eng <- c("high", "low")) +
scale_y_continuous(labels=scaleFUN)+
scale_alpha_manual(values = c("TRUE" = 1, "FALSE" = 0.6), guide = F)+
scale_x_discrete(label = labels, breaks = breaks)+
theme(legend.position = "top")+
xlab("")+
ylab("Relative Accuracy in %")
# Save plot
plot_PROBAST
ggsave("plots/figure_S1_PROBAST.svg",plot_PROBAST, height = 4.5, width = 9)